pacman::p_load(tidyverse, data.table, broom,lfe,fixest,ggpubr,modelsummary,kableExtra)
rm(list=ls())
options("modelsummary_format_numeric_latex" = "plain")
################################################################################

product_list = c("Maize","Cake, maize","Oil, maize","Maize, green",
                 "Meat, cattle","Meat, cattle, boneless (beef & veal)",
                 "Soybeans","Cake, soybeans","Oil, soybean",
                 "Rice, milled","Rice, husked","Rice, paddy","Rice, broken","Rice, milled/husked","Cake, rice bran",
                 "Sunflower seed","Oil, sunflower","Cake, sunflower",
                 "Wheat",
                 "Sugar refined","Sugar Raw Centrifugal")


############################ Substitution across counties

df = setDT(read.csv(gzfile(paste0("inputs/data_demand_county_commodity.csv.gz"),"rt"), header=T))
df[, c('X_ijt_X_njt','p_ijt','s_ij') := list( X_ijt/X_njt, X_ijt/Q_ijt, X_ij/X_i)][, c('z_ijt') := list(W_nt*s_ij)]

#Variables
df$y = log(df$X_ijt_X_njt)
df$x <- log(df$p_ijt)
df$iv <- log(df$z_ijt)
df_sample = df[is.finite(df$y) & is.finite(df$x)  & is.finite(df$iv), ]

#Specifications
m_ols1 = felm(y ~ x | njtc| 0 | ij, data=df_sample)
m_iv1 = felm(y ~ 1 | njtc | (x~iv) | ij, data=df_sample)
m_iv1_fe = feols(y ~ 1 | njtc | x~iv, cluster = c('ij'), data=df_sample)
Fstat_iv1 = as.character(round(m_iv1$stage1$iv1fstat$x[5],0))

#Fixed effects
df = data.frame(fixef(m_iv1_fe)$njtc)
df$njtc = rownames(df)
names(df)[1] <- "FE_njtc"
setDT(df)
df_fe = df

#Recover taste shifters
df = merge(df_sample,df_fe,by=c('njtc'))
df$a_ijtc = exp(resid(m_iv1))
df_lower = df[,list(year,origin_id,origin_nation,destination_id,product,a_ijtc)]



############################ Substitution across nations 

df = fread("inputs/data_demand_nation.csv.gz")[product %in% product_list,]
df[, c('X_njt_X_jt','p_njt','s_nj') := list( X_njt/X_jt, X_njt/Q_njt, X_nj/X_n )][, c('z_njt') := list(W_nt*s_nj)]

#Variables
df$origin_id=toupper(df$origin_id)
df$y = log(df$X_njt_X_jt)
df$x <- log(df$p_njt)
df$iv <- log(df$z_njt)
df_sample = df[is.finite(df$y) & is.finite(df$x)  & is.finite(df$iv), ]

#Specifications
m_ols2 = felm(y ~ x | jtc| 0 | nj, data=df_sample)
m_iv2 = felm(y ~ 1 | jtc | (x~iv) | nj, data=df_sample)
m_iv2_fe = feols(y ~ 1 | jtc | x~iv, cluster = c('nj'), data=df_sample)
Fstat_iv2 = as.character(round(m_iv2$stage1$iv1fstat$x[5],0))

#Fixed effects
df = data.frame(fixef(m_iv2_fe)$jtc)
df$jtc = rownames(df)
names(df)[1] <- "FE_jtc"
setDT(df)
df_fe = df

#Recover taste shifters
df = merge(df_sample,df_fe,by=c('jtc'))
df$a_njtc = exp(resid(m_iv2))

df_middle = df[,list(year,origin_id,destination_id,product,a_njtc)]



############################ Substitution across commodities 

df = fread("inputs/data_demand_commodity.csv.gz")[product %in% product_list,]

#Variables
df$y = log(df$X_jtc_X_jt)
df$x <- log(df$p_jtc)
df$iv <- log(df$z_jtc)
df = df[is.finite(df$y) & is.finite(df$x)  & is.finite(df$iv), ]
df_sample = df

#Specifications
m_ols3 = felm(y ~ x | jt| 0 | destination_id, data=df_sample)
m_iv3 = felm(y ~ 1 | jt | (x~iv) | destination_id, data=df_sample)
m_iv3_fe = feols(y ~ 1 | jt | x~iv, cluster = c('destination_id'), data=df_sample)
Fstat_iv3 = as.character(round(m_iv3$stage1$iv1fstat$x[5],0))

#Fixed effects
df = data.frame(fixef(m_iv3_fe)$jt)
df$jt = rownames(df)
names(df)[1] <- "FE_jt"
setDT(df)
df_fe = df

#Recover taste shifters
df = merge(df_sample,df_fe,by=c('jt'))
df$a_jtc = exp(resid(m_iv3))
df_upper = df[,list(year,destination_id,product,a_jtc,X_jt)]



############################ Tables

tab_title = paste0('\\label{tab: estimation - demand}Demand substitution elasticities.')
rows <- tribble(~term,  ~OLS, ~IV,~OLS,~IV,~OLS,~IV,
                'KP-Wald First stage F-statistic', '',Fstat_iv1,'',Fstat_iv2,'',Fstat_iv3)
attr(rows, 'position') <- c(3)
gm <- tibble::tribble(~raw,        ~clean,          ~fmt,
                      "nobs",      "Observations",  0)
notes_content = list('Notes:')
results_table_tex <- msummary(list('OLS'=m_ols1,'IV'=m_iv1,'OLS'=m_ols2,'IV'=m_iv2,'OLS'=m_ols3,'IV'=m_iv3), 
                              title = tab_title, 
                              coef_omit = 'Intercept',coef_map =  c('`x(fit)`'='log (price)','x'='log (price)','x(fit)'= 'log (price)'),
                              estimate = "{estimate}{stars}",
                              escape=F,
                              gof_map = gm,
                              add_rows = rows, notes=notes_content,
                              output='latex') %>% add_header_above(c(" "=1, "(across counties)" = 2, "(across nations)" = 2, "(across commodities)" = 2)) %>% add_header_above(c(" "=1, "Lower level" = 2, "Middle level" = 2, "Upper level" = 2), line=F) %>% add_header_above(c(" "=1, "Dep. var.: log (expenditure share)" = 6), line=F)

results_table_tex = results_table_tex %>% kable_styling(latex_options = "HOLD_position")
kableExtra::save_kable(results_table_tex, file = "../../output/tables/table4_demand.tex")



############################ Export parameters

#Lower level
df_l = df_lower
setnames(df_l, old='product', new='commodity')
df_aux1 = df_l[origin_nation=='ARGENTINA',]
df_aux1$commodity = 'beef'
df_aux2 = df_l[origin_nation=='ARGENTINA',]
df_aux2$commodity = 'maize'
df_l = rbind(df_l,df_aux1,df_aux2)

#Middle level
df = df_middle
df$commodity = ''
df[product %in% c("Cake, soybeans","Soybeans","Oil, soybean"),]$commodity = 'soybean'
df[product %in% c("Maize","Oil, maize","Cake, maize","Maize, green"),]$commodity = 'maize'
df[product %in% c("Meat, cattle","Meat, cattle, boneless (beef & veal)"),]$commodity = 'beef'
df[product %in% c("Rice, paddy","Rice, broken","Rice, husked","Cake, rice bran","Rice, milled","Rice, milled/husked"),]$commodity = 'rice'
df[product %in% c("Sugar refined","Sugar Raw Centrifugal"),]$commodity = 'sugarcane'
df[product %in% c("Sunflower seed","Oil, sunflower","Cake, sunflower"),]$commodity = 'sunflower'
df[product %in% c("Wheat"),]$commodity = 'wheat'
df_m = df[,c('year','origin_id','destination_id','commodity','a_njtc')][, lapply(.SD, mean, na.rm=TRUE), by=list(year,origin_id,destination_id,commodity)]
setnames(df_m, old='origin_id', new='origin_nation')

#Upper level
df = df_upper
df$commodity = ''
df[product %in% c("Soybeans"),]$commodity = 'soybean'
df[product %in% c("Maize","Maize, green"),]$commodity = 'maize'
df[product %in% c("Meat, cattle"),]$commodity = 'beef'
df[product %in% c("Rice, paddy"),]$commodity = 'rice'
df[product %in% c("Sunflower seed"),]$commodity = 'sugarcane'
df[product %in% c("Wheat" ),]$commodity = 'wheat'
df_u = df[,c('year','destination_id','commodity','a_jtc','X_jt')][, lapply(.SD, mean, na.rm=TRUE), by=list(year,destination_id,commodity)]

#Merge
df = merge(df_l,df_m, by=c('year','origin_nation','destination_id','commodity'))
df = merge(df, df_u, by=c('year','destination_id','commodity'))

#Back out CES parameters
df$eta_l = 1-m_iv1$coefficients[1]
df$eta_m = 1-m_iv2$coefficients[1]
df$eta_u = 1-m_iv3$coefficients[1]
df_x = df

df = fread("../../data/trade/clean/country_codes.csv.gz")
df$destination_continent=toupper(df$continent)
df$destination_id=toupper(df$country)
df = df[,list(destination_continent,destination_id)]
df_y=df

df = merge(df_x,df_y,by=c('destination_id'))
setcolorder(df,c("year","origin_nation","origin_id","destination_id","destination_continent","commodity",
                 "a_ijtc","a_njtc","a_jtc","X_jt","eta_l","eta_m","eta_u"))
write.csv(df, "inputs/parameters_demand.csv", row.names = FALSE)

